import json
import pymysql
import requests
from datetime import datetime
from urllib.parse import urljoin
from lxml import html
from bs4 import BeautifulSoup

# 承认签证签发数量
BASE_URL = 'http://cmp.msa.gov.cn/crew_mgt/ShowReport.wx??DISPLAY_TYPE=1&PAGEID=report_admitvisas_query&report1_ALLDATASETS_RECORDCOUNT=report1__default_default_default_key__default_default_default_key%3D78%3B&refreshComponentGuid=report_admitvisas_query_guid_report1&SEARCHREPORT_ID=report1&WX_ISAJAXLOAD=true'
headers = {
    'Accept': 'application/json',
    'Content-Type': 'application/json;charset=utf-8'
}
cookies = {
    # OAMAuthnCookie_cmp.msa.gov.cn:80 参数用于登录认证
    'OAMAuthnCookie_cmp.msa.gov.cn:80': 'viWzYETtjZK4sCkKC4M%2B%2BmOvGVCzJgBaVC94ellAeQM%2Bt%2FqT6yWWBWU%2FPTNxLNqmjvhM60dqXIiliqXo259yzUmX1baz73LlqQpG2oU4c5FnJVsBXUQyCcwH%2B4qKApb37NiIl4lf7OdFfjdvdwIW5OC7%2Ba0o1A5eRQgRkTIikwrQ8F59p932rBMkb7KV3y3QsE4xh0paE8rMZm2Fld2m0rsyDuyhWqHESqubIlVnu95OM9b7I9pMhN4uWXmBcIiZSA9BKB7k8hqZtBq%2Fk15XbG9LlO6Z%2FJgIkHdi9wzB1etzEDhOMfkFVmTDBYNkVNA8YjlE0KbX%2FGlttaFOIeotl53UTZsuFshwouoOkDa4LAgGRDl%2FNHNSNuDJk4XkXV4egKodQGul%2F0jwKVnW5vQb4PW%2B31c6Llo2F46R6AKfkbIwjzvKKFUlgJBADeXBS8abfR74XWDlmBFdEZ6%2BszgPiTStt2pxD3P2ntLzqUQQzwo%3D',

    # OAMAuthnHintCookie和JSESSIONID 参数用于声明返回接口数据，不配置的会一直返回HTML
    'OAMAuthnHintCookie': '1',
    'JSESSIONID': '-LtvxkHcVN2kB95Igtu0Ydbg1WIG08G7nmSlcBp9U4ZKkh8s4k5J!49340319'
}

session = requests.Session()
response_lo = session.post(BASE_URL, headers=headers, cookies=cookies)
hrml_content = response_lo.text
print(hrml_content)
if response_lo.status_code == 200:
    soup = BeautifulSoup(hrml_content, 'lxml')
    # 获取html中所有的table
    # tables = soup.find_all('table')
    # 获取html中指定的table
    tables = soup.find_all('table', class_="table table-hover table-striped cls-data-table")
    # 获取table中所有的tr
    rows = tables[0].find_all('tr')
    # print(">>>")
    # print(rows)
    # print(tables)

    dataStr = []
    # 循环获取td里面div的值
    for row in rows:
        data = []
        strTds = row.find_all('td')
        for strTd in strTds:
            cols = strTd.find_all('div')
            cols = [ele.text.strip() for ele in cols]
            data.append(cols[0])

        dataStr.append(data)

    # 删除dataStr里面的空列，注意del一次只能删除一列
    del dataStr[0]

    # 数据库连接配置
    db_config = {
        'host': '191.254.8.220',
        'user': 'root',
        'password': 'Szmsa@520',
        'db': 'spiders',
        'charset': 'utf8mb4',
        'cursorclass': pymysql.cursors.DictCursor
    }
    # 连接数据库
    connection = pymysql.connect(**db_config)

    try:
        # 开始事务
        with connection.cursor() as cursor:
            # 拼装需要插入字段对应的%号个数
            dataValue = ", ".join(['%s'] * len(data))
            print(dataStr)
            sql = f"INSERT INTO xt_mariner_visa_admit (serial_number,user_name,id_number,bid_number,declarant,bid_org,bid_date,status,transactor) VALUES ({dataValue})"
            connection.cursor().executemany(sql, dataStr)
            connection.commit()
    finally:
        connection.close()
